library(simona)
dag = create_ontology_DAG_from_GO_db(org_db = "org.Hs.eg.db")
dag
## An ontology_DAG object:
##   Source: GO BP / GO.db package 
##   27942 terms / 55956 relations
##   Root: GO:0008150 
##   Terms: GO:0000001, GO:0000002, GO:0000003, GO:0000011, ...
##   Max depth: 18 
##   Avg number of parents: 2.00
##   Aspect ratio: 363.92:1 (based on the longest distance to root)
##                 782.78:1 (based on the shortest distance to root)
##   Relations: is_a, part_of
##   Annotations are available.
## 
## With the following columns in the metadata data frame:
##   id, name, definition

Let’s make a comparison of various similarity methods, using the GO BP ontology.

set.seed(123)
ic = term_IC(dag, method = "IC_annotation")
ic = ic[!is.na(ic)]
go_id = sample(names(ic), 500)
lt = lapply(all_term_sim_methods(), function(method) {
    term_sim(dag, go_id, method)
})
names(lt) = all_term_sim_methods()
df = as.data.frame(lapply(lt, function(x) x[lower.tri(x)]))

LCA_depth = LCA_depth(dag, go_id)
LCA_depth = LCA_depth[lower.tri(LCA_depth)]

And the heatmap of the correlations of semantic similarities.

cor = cor(df, use = "pairwise.complete.obs")

library(ComplexHeatmap)
Heatmap(cor, name = "correlation", column_title = "Pearson correlation")

ind = which(colnames(df) %in% c("Sim_Jiang_1997", 
    "Sim_Dice", "Sim_Kappa", "Sim_Jaccard", "Sim_Overlap", 
    "Sim_AIC_2014", "Sim_universal", "Sim_HRSS_2013"))
cor2 = cor[-ind, -ind]
df2 = df[, -ind]
Heatmap(cor2, name = "correlation", column_title = "Pearson correlation")

group = c("Sim_Pekar_2002" = 1, "Sim_Stojanovic_2001" = 1, "Sim_WP_1994" = 1,
          "Sim_Shenoy_2012" = 1, "Sim_Li_2003" = 1, "Sim_Wang_edge_2012" = 1,
          "Sim_SSDD_2013" = 2, "Sim_RSS_2013" = 2, "Sim_Zhong_2002" = 2,
          "Sim_Slimani_2006" = 2, "Sim_Shen_2010" = 3, "Sim_Zhang_2006" = 3,
          "Sim_EISI_2015" = 3, "Sim_XGraSM_2013" = 3, "Sim_Lin_1998" = 3,
          "Sim_Resnik_1999" = 3, "Sim_FaITH_2010" = 3, "Sim_Relevance_2006" = 3,
          "Sim_SimIC_2010" = 3, "Sim_Wang_2007" = 4, "Sim_Ancestor" = 4,
          "Sim_GOGO_2018" = 4, "Sim_AlMubaid_2006" = 4, "Sim_Rada_1989" = 4,
          "Sim_Leocock_1998" = 4, "Sim_Resnik_edge_2005" = 4)
library(ggrepel)
library(ggplot2)
loc = cmdscale(as.dist(1-cor2))
loc = as.data.frame(loc)
colnames(loc) = c("x", "y")
loc$method = rownames(loc)

loc$group = group[rownames(loc)]

ggplot(loc, aes(x, y, label = method, col = factor(group))) + 
    geom_point() + 
    geom_text_repel(show.legend = FALSE) +
    labs(x = "Dimension 1", y = "Dimension 2") +
    ggtitle("MDS based on the correlation between similarities")

Select an individual similarity heatmap by: Use order from Sim_Lin_1998

Prev method: Sim_Ancestor Curr method: Sim_Lin_1998 Next method: Sim_Resnik_1999

ind = sample(nrow(df2), 5000)
pairs(df2[ind, names(group)], pch = ".", gap = 0, col = LCA_depth[ind]+1)

sessionInfo()
## R version 4.3.1 (2023-06-16)
## Platform: x86_64-apple-darwin20 (64-bit)
## Running under: macOS Ventura 13.2.1
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/lib/libRblas.0.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/lib/libRlapack.dylib;  LAPACK version 3.11.0
## 
## locale:
## [1] C/UTF-8/C/C/C/C
## 
## time zone: Europe/Berlin
## tzcode source: internal
## 
## attached base packages:
## [1] grid      stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
## [1] ggrepel_0.9.4         ggplot2_3.4.4         ComplexHeatmap_2.16.0
## [4] simona_0.99.10        knitr_1.44            rmarkdown_2.25       
## 
## loaded via a namespace (and not attached):
##  [1] tidyselect_1.2.0        farver_2.1.1            dplyr_1.1.3            
##  [4] blob_1.2.4              Biostrings_2.68.1       bitops_1.0-7           
##  [7] fastmap_1.1.1           RCurl_1.98-1.12         digest_0.6.33          
## [10] lifecycle_1.0.3         cluster_2.1.4           KEGGREST_1.40.1        
## [13] RSQLite_2.3.1           magrittr_2.0.3          compiler_4.3.1         
## [16] rlang_1.1.1             sass_0.4.7              tools_4.3.1            
## [19] igraph_1.5.1            utf8_1.2.3              yaml_2.3.7             
## [22] labeling_0.4.3          bit_4.0.5               scatterplot3d_0.3-44   
## [25] xml2_1.3.5              RColorBrewer_1.1-3      withr_2.5.1            
## [28] BiocGenerics_0.46.0     stats4_4.3.1            fansi_1.0.5            
## [31] colorspace_2.1-0        GO.db_3.17.0            scales_1.2.1           
## [34] iterators_1.0.14        cli_3.6.1               crayon_1.5.2           
## [37] generics_0.1.3          ragg_1.2.6              RcppParallel_5.1.7     
## [40] httr_1.4.7              rjson_0.2.21            DBI_1.1.3              
## [43] cachem_1.0.8            zlibbioc_1.46.0         parallel_4.3.1         
## [46] AnnotationDbi_1.62.2    XVector_0.40.0          proxyC_0.3.3           
## [49] matrixStats_1.0.0       vctrs_0.6.4             Matrix_1.6-1.1         
## [52] jsonlite_1.8.7          IRanges_2.34.1          GetoptLong_1.0.5       
## [55] S4Vectors_0.38.2        bit64_4.0.5             clue_0.3-65            
## [58] systemfonts_1.0.5       magick_2.8.0            foreach_1.5.2          
## [61] jquerylib_0.1.4         glue_1.6.2              codetools_0.2-19       
## [64] Polychrome_1.5.1        shape_1.4.6             gtable_0.3.4           
## [67] GenomeInfoDb_1.36.4     munsell_0.5.0           tibble_3.2.1           
## [70] pillar_1.9.0            htmltools_0.5.6.1       GenomeInfoDbData_1.2.10
## [73] circlize_0.4.15         R6_2.5.1                textshaping_0.3.7      
## [76] doParallel_1.0.17       evaluate_0.22           Biobase_2.60.0         
## [79] lattice_0.21-9          png_0.1-8               memoise_2.0.1          
## [82] bslib_0.5.1             Rcpp_1.0.11             org.Hs.eg.db_3.17.0    
## [85] xfun_0.40               pkgconfig_2.0.3         GlobalOptions_0.1.2